{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from scipy.stats.distributions import genextreme\n",
    "import matplotlib.pyplot as plt\n",
    "import dautil as dl\n",
    "import numpy as np\n",
    "from IPython.display import HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "lr = dl.nb.LatexRenderer(chapter=3, start=11)\n",
    "lr.render(r'F(x;\\mu,\\sigma,\\xi) = \\exp\\left\\{-\\left[1+\\xi\\left(\\frac{x-\\mu}{\\sigma}\\right)\\right]^{-1/\\xi}\\right\\}')\n",
    "lr.render(r'1+\\xi(x-\\mu)/\\sigma>0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def set_labels(ax):\n",
    "    ax.set_xlabel(dl.data.Weather.get_header('RAIN'))\n",
    "    ax.set_ylabel('Probability')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def run_sims(nsims):\n",
    "    sums = []\n",
    "    \n",
    "    np.random.seed(19)\n",
    "\n",
    "    for i in range(nsims):\n",
    "        for j in range(len(years)):\n",
    "            sample_sum = dist.rvs(shape, loc, scale, size=365).sum()\n",
    "            sums.append(sample_sum)\n",
    "\n",
    "    a = np.array(sums)\n",
    "    low, high = dl.stats.ci(a)\n",
    "\n",
    "    return a, low, high"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "rain = dl.data.Weather.load()['RAIN'].dropna()\n",
    "annual_sums = rain.resample('A', how=np.sum)\n",
    "years = np.unique(rain.index.year)\n",
    "limit = np.percentile(rain, 95)\n",
    "rain = rain[rain > limit]\n",
    "dist = dl.stats.Distribution(rain, genextreme)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "shape, loc, scale = dist.fit()\n",
    "table = dl.report.DFBuilder(['shape', 'loc', 'scale'])\n",
    "table.row([shape, loc, scale])\n",
    "dl.options.set_pd_options()\n",
    "html_builder = dl.report.HTMLBuilder()\n",
    "html_builder.h1('Exploring Extreme Values')\n",
    "html_builder.h2('Distribution Parameters')\n",
    "html_builder.add_df(table.build())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pdf = dist.pdf(shape, loc, scale)\n",
    "html_builder.h2('Residuals of the Fit')\n",
    "residuals = dist.describe_residuals()\n",
    "html_builder.add(residuals.to_html())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "table2 = dl.report.DFBuilder(['Mean_AD', 'RMSE'])\n",
    "table2.row([dist.mean_ad(), dist.rmse()])\n",
    "html_builder.h2('Fit Metrics')\n",
    "html_builder.add_df(table2.build())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "dl.options.mimic_seaborn()\n",
    "context = dl.nb.Context('extreme_values')\n",
    "dl.nb.RcWidget(context)\n",
    "dl.nb.LabelWidget(2, 2, context)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sp = dl.plotting.Subplotter(2, 2, context)\n",
    "\n",
    "sp.ax.hist(annual_sums, normed=True, bins=dl.stats.sqrt_bins(annual_sums))\n",
    "sp.label()\n",
    "set_labels(sp.ax)\n",
    "\n",
    "sp.next_ax()\n",
    "sp.label()\n",
    "sp.ax.set_xlim([5000, 10000])\n",
    "sims = []\n",
    "nsims = [25, 50, 100, 200]\n",
    "\n",
    "for n in nsims:\n",
    "    sims.append(run_sims(n))\n",
    "\n",
    "sims = np.array(sims)\n",
    "sp.ax.hist(sims[2][0], normed=True, bins=dl.stats.sqrt_bins(sims[2][0]))\n",
    "set_labels(sp.ax)\n",
    "\n",
    "sp.next_ax()\n",
    "sp.label()\n",
    "sp.ax.set_xlim([10, 40])\n",
    "sp.ax.hist(rain, bins=dist.nbins, normed=True, label='Rain')\n",
    "sp.ax.plot(dist.x, pdf, label='PDF')\n",
    "set_labels(sp.ax)\n",
    "sp.ax.legend(loc='best')\n",
    "\n",
    "sp.next_ax()\n",
    "sp.ax.plot(nsims, sims.T[1], 'o', label='2.5 percentile')\n",
    "sp.ax.plot(nsims, sims.T[2], 'x', label='97.5 percentile')\n",
    "sp.ax.legend(loc='center')\n",
    "sp.label(ylabel_params=dl.data.Weather.get_header('RAIN'))\n",
    "\n",
    "plt.tight_layout()\n",
    "HTML(html_builder.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
